home *** CD-ROM | disk | FTP | other *** search
Wrap
<?php // ---------------------------------------------------------------------------- // Zoom Search Engine 4.0 (10/3/2005) // PHP search front-end // A fast custom website search engine using pre-indexed data files. // Copyright (C) Wrensoft 2000 - 2004 // // This script is designed for PHP 4.0 + only. // // email: zoom@wrensoft.com // www: http://www.wrensoft.com // ---------------------------------------------------------------------------- if(strcmp('4.0.0', phpversion()) > 0) die("This version of the script requires PHP 4.0.0 or higher.<br />"); require("settings.php"); // Check for dependent files if (!file_exists("settings.php") || !file_exists("zoom_wordmap.zdat") || !file_exists("zoom_dictionary.zdat") || !file_exists("zoom_pages.zdat") || !file_exists("zoom_titles.zdat")) { print("<b>Zoom files missing error:</b> Zoom is missing one or more of the required index data files.<br />Please make sure the generated index files are uploaded to the same path as this search script.<br />"); return; } if ($Spelling == 1 && !file_exists("zoom_spelling.zdat")) print("<b>Zoom files missing error:</b> Zoom is missing the 'zoom_spelling.zdat' file required for the Spelling Suggestion feature which has been enabled.<br />"); // ---------------------------------------------------------------------------- // Settings // ---------------------------------------------------------------------------- // The options available in the dropdown menu for number of results // per page $PerPageOptions = array(10, 20, 50, 100); /* // For foreign language support, setlocale may be required on the server for // wildcards and highlighting to work. Uncomment the following lines and specify // the appropriate locale information //if (setlocale(LC_ALL, "ru_RU.cp1251") == false) // for russian // print("Failed to change locale setting or locale setting invalid"); */ // ---------------------------------------------------------------------------- // Parameter initialisation // ---------------------------------------------------------------------------- // Send HTTP header to define meta charset if (isset($Charset) && strlen($Charset) > 0) header("Content-Type: text/html; charset=" . $Charset); // For versions of PHP before 4.1.0 // we will emulate the superglobals by creating references // NOTE: references created are NOT superglobals if (!isset($_SERVER) && isset($HTTP_SERVER_VARS)) $_SERVER = &$HTTP_SERVER_VARS; if (!isset($_GET) && isset($HTTP_GET_VARS)) $_GET = &$HTTP_GET_VARS; if (!isset($_POST) && isset($HTTP_POST_VARS)) $_POST = &$HTTP_POST_VARS; // check if magic_quotes are on for Get/Post/Cookie variables // and fix accordingly (we don't use cookies so we leave them out) if (get_magic_quotes_gpc() == 1) { while (list($key, $value) = each($_GET)) $_GET["$key"] = stripslashes($value); while (list($key, $value) = each($_POST)) $_POST["$key"] = stripslashes($value); } // check magic_quotes for runtime stuff (reading from files, etc) if (get_magic_quotes_runtime() == 1) set_magic_quotes_runtime(0); // we use the method=GET and 'query' parameter now (for sub-result pages etc) if (isset($_GET['zoom_query'])) { $query = $_GET['zoom_query']; } else $query = ""; // number of results per page, defaults to 10 if not specified if (isset($_GET['zoom_per_page'])) $per_page = $_GET['zoom_per_page']; else $per_page = 10; // current result page number, defaults to the first page if not specified $NewSearch = 0; if (isset($_GET['zoom_page'])) $page = $_GET['zoom_page']; else { $page = 1; $NewSearch = 1; } // AND operator. // 1 if we are searching for ALL terms // 0 if we are searching for ANY terms (default) if (isset($_GET['zoom_and'])) $and = $_GET['zoom_and']; elseif (isset($DefaultToAnd) && $DefaultToAnd == 1) $and = 1; else $and = 0; // for category support if (isset($_GET['zoom_cat'])) $cat = $_GET['zoom_cat']; else $cat = -1; // search all categories // for sorting options // zero is default (relevance) // 1 is sort by date (if Date/Time is available) if (isset($_GET['zoom_sort'])) $sort = $_GET['zoom_sort']; else $sort = 0; if (isset($LinkBackURL) == false || strlen($LinkBackURL) < 1) $SelfURL = $_SERVER['PHP_SELF']; else $SelfURL = $LinkBackURL; // init. link target string $target = ""; if ($UseLinkTarget == 1) $target = " target=\"" . $LinkTarget . "\" "; // ---------------------------------------------------------------------------- // Functions // ---------------------------------------------------------------------------- function PrintEndOfTemplate($template) { global $ZoomInfo; global $STR_POWEREDBY; global $template_line; //Let others know about Zoom. if ($ZoomInfo == 1) print("<center><p><small>" . $STR_POWEREDBY . " <a href=\"http://www.wrensoft.com/zoom/\" target=\"_blank\"><b>Zoom Search Engine</b></a></small></p></center>"); //Print out the end of the template while ($template_line < count($template)) { print($template[$template_line]); $template_line++; } } function PrintHighlightDescription($line) { global $HighlightColor; global $SearchWords; global $numwords; global $SearchAsSubstring; $res = $line; for ($i = 0; $i < $numwords; $i++) { if (strlen($SearchWords[$i]) < 1) continue; // replace with marker text, assumes [;:] and [:;] is not the search text... if ($SearchAsSubstring == 1) $res = preg_replace("/(" .$SearchWords[$i] . ")/i", "[;:]$1[:;]", $res); else $res = preg_replace("/(\W|\A|\b)(" .$SearchWords[$i] . ")(\W|\Z|\b)/i", "$1[;:]$2[:;]$3", $res); } // replace the marker text with the html text // this is to avoid finding previous <span>'ed text. $res = str_replace("[;:]", "<span class=\"highlight\">", $res); $res = str_replace("[:;]", "</span>", $res); print $res; } function PrintNumResults($num) { global $STR_NO_RESULTS, $STR_RESULT, $STR_RESULTS; if ($num == 0) return $STR_NO_RESULTS; else if ($num == 1) return $num . " " . $STR_RESULT; else return $num . " " . $STR_RESULTS; } // ---------------------------------------------------------------------------- // Compares the two values, used for sorting output results // Results that match all search terms are put first, highest score // ---------------------------------------------------------------------------- function SortCompare ($a, $b) { if ($a[2] < $b[2]) return 1; else if ($a[2] > $b[2]) return -1; else { if ($a[1] < $b[1]) return 1; else if ($a[1] > $b[1]) return -1; else return 0; } } function SortByDate ($a, $b) { global $datetime; if ($datetime[$a[0]] < $datetime[$b[0]]) return 1; else if ($datetime[$a[0]] > $datetime[$b[0]]) return -1; else { // if equal dates/time, return based on sw matched and score return SortCompare($a, $b); } } // ---------------------------------------------------------------------------- // Translates a typical shell wildcard pattern ("zoo*" => "zoom" etc.) // to a regular expression pattern. Supports only '*' and '?' characters. // ---------------------------------------------------------------------------- function pattern2regexp($pattern) { global $SearchAsSubstring; global $ToLowerSearchWords; $i = 0; $len = strlen($pattern); if (strpos($pattern, "$") !== false) str_replace($pattern, "$", "\$"); if (strpos($pattern, "#") !== false) str_replace($pattern, "#", "\#"); $res = ""; while ($i < $len) { $c = $pattern[$i]; if ($c == '*') $res = $res . "[\d\S]*"; else if ($c == '?') $res = $res . "."; else if ($c == '.') $res = $res . "\."; else $res = $res . preg_quote($c, '/'); $i++; } return $res; } function GetDictID($word) { global $dict; global $dict_count; for ($i = 0; $i < $dict_count; $i++) { if (strcasecmp($dict[$i][0], $word) == 0) return $i; } return -1; // not found } function GetNextDictWord($fp_pagetext) { $word_id = 0; do { $bytes_buffer = fread($fp_pagetext, 2); // grab 2 bytes $dict_id = ord($bytes_buffer[0]) | ord($bytes_buffer[1])<<8; $word_id += $dict_id; } while ($dict_id >= 65535); return $word_id; } function SkipSearchWord($sw) { global $SearchWords; global $SkippedWords; global $SkippedOutputStr; if ($SearchWords[$sw] != "") { if ($SkippedWords > 0) $SkippedOutputStr .= ", "; $SkippedOutputStr .= "\"<b>" . $SearchWords[$sw] . "</b>\""; $SearchWords[$sw] = ""; } $SkippedWords++; } function GetSPCode($word) { $tmpword = strtolower($word); // strip out non alphabetic characters $tmpword = preg_replace("/[^a-z]/", "", $tmpword); $wordlen = strlen($tmpword); if ($wordlen < 1) return ""; $spcode = $tmpword[0]; $tmpword = substr($tmpword, 1); $tmpword = str_replace("a", "0", $tmpword); $tmpword = str_replace("e", "0", $tmpword); $tmpword = str_replace("i", "0", $tmpword); $tmpword = str_replace("o", "0", $tmpword); $tmpword = str_replace("u", "0", $tmpword); $tmpword = str_replace("h", "0", $tmpword); $tmpword = str_replace("w", "0", $tmpword); $tmpword = str_replace("y", "0", $tmpword); $tmpword = str_replace("b", "1", $tmpword); $tmpword = str_replace("p", "1", $tmpword); $tmpword = str_replace("f", "1", $tmpword); $tmpword = str_replace("v", "1", $tmpword); $tmpword = str_replace("c", "2", $tmpword); $tmpword = str_replace("g", "2", $tmpword); $tmpword = str_replace("j", "2", $tmpword); $tmpword = str_replace("k", "2", $tmpword); $tmpword = str_replace("q", "2", $tmpword); $tmpword = str_replace("s", "2", $tmpword); $tmpword = str_replace("x", "2", $tmpword); $tmpword = str_replace("z", "2", $tmpword); $tmpword = str_replace("d", "3", $tmpword); $tmpword = str_replace("t", "3", $tmpword); $tmpword = str_replace("l", "4", $tmpword); $tmpword = str_replace("m", "5", $tmpword); $tmpword = str_replace("m", "5", $tmpword); $tmpword = str_replace("r", "6", $tmpword); // Remove any adjacent digits $char = ""; $nextChar = ""; $tmpstr = ""; for ($i = 0; $i < $wordlen; $i++) { $char = substr($tmpword, $i, 1); $nextChar = substr($tmpword, $i+1, 1); if (is_numeric($char) && $char != $nextChar) $tmpstr = $tmpstr . $char; } $tmpword = $tmpstr; if (strlen($tmpword) < 1) return ""; $tmpword = str_replace("0", "", $tmpword); // zero pad it out 4 characters $tmpword = str_pad($tmpword, 4, "0"); $spcode = $spcode . substr($tmpword, 0, 3); return $spcode; } // ---------------------------------------------------------------------------- // Starts here // ---------------------------------------------------------------------------- if ($Timing == 1 || $Logging == 1) { $mtime = explode(" ", microtime()); $starttime = doubleval($mtime[1]) + doubleval($mtime[0]); } //Open and print start of result page template $template = file ($TemplateFilename); $numtlines = count ($template); //Number of lines in the template $template_line = 0; while ($template_line < $numtlines) { if (!stristr($template[$template_line], "<!--ZOOMSEARCH-->")) { print($template[$template_line]); $template_line++; } else { break; } } $template_line++; if ($UseCats) { if (file_exists("zoom_cats.zdat") && file_exists("zoom_catpages.zdat")) { $catnames = file("zoom_cats.zdat"); $catpages = file("zoom_catpages.zdat"); } else { print("Zoom config error: Missing file(s) zoom_cats.zdat and zoom_catpages.zdat required for category enabled search mode"); return; } } print("<!--Zoom Search Engine ".$Version."-->\n"); // Replace the key text <!--ZOOMSEARCH--> with the following if ($FormFormat > 0) { // Insert the form print("<form method=\"get\" action=\"".$SelfURL."\" class=\"zoom_searchform\">\n"); print($STR_FORM_SEARCHFOR . " <input type=\"text\" name=\"zoom_query\" size=\"20\" value=\"".htmlspecialchars($query)."\" class=\"zoom_searchbox\" />\n"); print("<input type=\"submit\" value=\"" . $STR_FORM_SUBMIT_BUTTON . "\" class=\"zoom_button\" />\n"); if ($FormFormat == 2) { print("<span class=\"zoom_options\">" . $STR_FORM_RESULTS_PER_PAGE . "\n"); print("<select name='zoom_per_page'>\n"); reset($PerPageOptions); foreach ($PerPageOptions as $ppo) { print("<option"); if ($ppo == $per_page) print(" selected=\"selected\""); print(">". $ppo ."</option>\n"); } print("</select>\n<br /><br />\n"); if ($UseCats) { print($STR_FORM_CATEGORY . " "); print("<select name='zoom_cat'>"); // 'all cats option print("<option value=\"-1\">" . $STR_FORM_CATEGORY_ALL . "</option>"); for($i = 0; $i < count($catnames); $i++) { print("<option value=\"". $i . "\""); if ($i == $cat) print(" selected=\"selected\""); print(">". $catnames[$i] . "</option>"); } print("</select> \n"); } print($STR_FORM_MATCH . "\n"); if ($and == 0) { print("<input type=\"radio\" name=\"zoom_and\" value=\"0\" checked=\"checked\" />" . $STR_FORM_ANY_SEARCH_WORDS . "\n"); print("<input type=\"radio\" name=\"zoom_and\" value=\"1\" />" . $STR_FORM_ALL_SEARCH_WORDS . "\n"); } else { print("<input type=\"radio\" name=\"zoom_and\" value=\"0\" />" . $STR_FORM_ANY_SEARCH_WORDS . "\n"); print("<input type=\"radio\" name=\"zoom_and\" value=\"1\" checked=\"checked\" />" . $STR_FORM_ALL_SEARCH_WORDS . "\n"); } print("<input type=\"hidden\" name=\"zoom_sort\" value=\"" . $sort . "\" />\n"); print("</span>\n"); } else { print("<input type=\"hidden\" name=\"zoom_per_page\" value=\"" . $per_page . "\" />\n"); print("<input type=\"hidden\" name=\"zoom_and\" value=\"" . $and . "\" />\n"); print("<input type=\"hidden\" name=\"zoom_sort\" value=\"" . $sort . "\" />\n"); } print("</form>\n"); } // Give up early if no search words provided if (empty($query)) { // only display 'no query' line if no form is shown if ($FormFormat == 0) print($STR_NO_QUERY . "<br />"); PrintEndOfTemplate($template); return; } // Load index data files (*.zdat) --------------------------------------------- // Load the entire pages file into an array, all URL's on the site $urls = file('zoom_pages.zdat'); // Load the entire page titles file into an array $titles = file('zoom_titles.zdat'); if ($DisplayMetaDesc == 1) { $descriptions = file('zoom_descriptions.zdat'); if ($descriptions[0] == "This file blank due to indexing configuration.") { print("<b>Zoom config error:</b> The zoom_descriptions.zdat file is not properly created for the search settings specified.<br />Please check that you have re-indexed your site with the search settings selected in the configuration window.<br />"); return; } } // Open datetime file if ($UseDateTime == 1 || $DisplayDate == 1) { $fp_datetime = fopen ("zoom_datetime.zdat", "rt"); $i = 0; while (!feof($fp_datetime) && $i < $NumPages) { $dateline = fgets($fp_datetime, $MaxKeyWordLineLen); if (strlen($dateline) > 0) { $datetime[$i] = strtotime($dateline); $i++; } } if ($i < $NumPages) { print("<b>Zoom config error</b>: The zoom_datetime.zdat file is invalid or not up-to-date. Please make sure you have uploaded all files from the same indexing session.<br />"); $UseDateTime = 0; } fclose($fp_datetime); } // Open pagetext file if ($DisplayContext == 1 || $AllowExactPhrase == 1) { $fp_pagetext = fopen("zoom_pagetext.zdat", "rb"); $teststr = fgets($fp_pagetext, 8); if ($teststr[0] == "T" && $teststr[2] == "h" && $teststr[4] == "i" && $teststr[6] == "s") { print("<b>Zoom config error:</b> The zoom_pagetext.zdat file is not properly created for the search settings specified.<br />Please check that you have re-indexed your site with the search settings selected in the configuration window.<br />"); fclose($fp_pagetext); return; } } //Open keywords file $fp_wordmap = fopen ("zoom_wordmap.zdat", "rb"); if ($urls == FALSE || $titles == FALSE || $fp_wordmap == FALSE) { print($STR_ERR_MISSING_ZDAT_FILES); return; } $fp_dict = fopen("zoom_dictionary.zdat", "rt"); $i = 0; while (!feof($fp_dict)) { $dictline = fgets($fp_dict, $MaxKeyWordLineLen); if (strlen($dictline) > 0) { $dict[$i] = explode(" ", $dictline, 2); $i++; } } fclose($fp_dict); $dict_count = $i; // Prepare query for search --------------------------------------------------- if ($MapAccents == 1) { $query = str_replace($AccentChars, $NormalChars, $query); } if ($ToLowerSearchWords == 1) { if ($UseUTF8 == 1 && function_exists('mb_strtolower')) $query = mb_strtolower($query, "UTF-8"); else $query = strtolower($query); } // prepare search query, strip quotes, trim whitespace if ($AllowExactPhrase == 0) { $query = str_replace("\"", " ", $query); } if (strspn(".", $WordJoinChars) == 0) $query = str_replace(".", " ", $query); if (strspn("-", $WordJoinChars) == 0) $query = str_replace("-", " ", $query); if (strspn("_", $WordJoinChars) == 0) $query = str_replace("_", " ", $query); if (strspn("'", $WordJoinChars) == 0) $query = str_replace("'", " ", $query); if (strspn("#", $WordJoinChars) == 0) $query = str_replace("#", " ", $query); if (strspn("$", $WordJoinChars) == 0) $query = str_replace("$", " ", $query); if (strspn(",", $WordJoinChars) == 0) $query = str_replace(",", " ", $query); // strip slashes, sloshes, consecutive spaces, parenthesis, etc. $query = preg_replace("/[\/\s\\\\(\)\^\[\]\|\+\{\}]+/", " ", $query); $query = trim($query); //Split search phrase into words preg_match_all("/\"(.*?)\"|[^\\s\"]+/", $query, $SearchWords); $SearchWords = preg_replace("/\"[\s]+|[\s]+\"|\"/", "", $SearchWords[0]); //Print heading print("<div class=\"searchheading\">" . $STR_RESULTS_FOR . " " . htmlspecialchars($query)); if ($UseCats) { if ($cat == -1) print(" " . $STR_RESULTS_IN_ALL_CATEGORIES); else print(" " . $STR_RESULTS_IN_CATEGORY . " \"". rtrim($catnames[$cat]) . "\""); } print "</div><br />\n"; print "<div class=\"results\">\n"; // Begin main search loop ----------------------------------------------------- $numwords = count ($SearchWords); $pagesCount = count($urls); $outputline = 0; $matches = 0; $UseWildCards = 1; // default as using wildcard // initialise $res_table to be a 2D array of count($pages) long, filled with zeros. if (function_exists('array_fill')) $res_table = array_fill(0, $pagesCount, array_fill(0, 6, 0)); else { $res_table = array(); for ($i = 0; $i < $pagesCount; $i++) { $res_table[$i] = array(); $res_table[$i][0] = 0; // score $res_table[$i][1] = 0; // num of sw matched $res_table[$i][2] = 0; // pagetext ptr #1 $res_table[$i][3] = 0; // pagetext ptr #2 $res_table[$i][4] = 0; // pagetext ptr #3 $res_table[$i][5] = 0; // 'and' user search terms matched } } // check if word is in skipword file $SkippedWords = 0; $context_maxgoback = 1; $SkippedExactPhrase = 0; $maxscore = 0; for ($sw = 0; $sw < $numwords; $sw++) { if ($SearchWords[$sw] == "") continue; // check min length if (strlen($SearchWords[$sw]) < $MinWordLen) { SkipSearchWord($sw); continue; } $ExactPhrase = 0; $UseWildCards = 0; if ($AllowExactPhrase == 1 && strpos($SearchWords[$sw], " ") !== false) { // Initialise exact phrase matching for this search term $ExactPhrase = 1; $phrase_terms = split(" ", $SearchWords[$sw]); //$phrase_terms = preg_split("/\W+/", $SearchWords[$sw], -1, 0 /*PREG_SPLIT_DELIM_CAPTURE*/); $num_phrase_terms = count($phrase_terms); if ($num_phrase_terms > $context_maxgoback) $context_maxgoback = $num_phrase_terms; $phrase_terms_data = array(); $tmpid = 0; $WordNotFound = 0; for ($j = 0; $j < $num_phrase_terms; $j++) { $tmpid = GetDictID($phrase_terms[$j]); if ($tmpid == -1) // word is not in dictionary { $WordNotFound = 1; break; } $wordmap_row = $dict[$tmpid][1]; if ($wordmap_row != -1) { fseek($fp_wordmap, $wordmap_row); $countbytes = fread($fp_wordmap, 2); $phrase_data_count[$j] = ord($countbytes[0]) | ord($countbytes[1])<<8; for ($xbi = 0; $xbi < $phrase_data_count[$j]; $xbi++) { $xbindata = fread($fp_wordmap, 8); if (strlen($xbindata) == 0) print "error in wordmap file: expected data not found"; $phrase_terms_data[$j][$xbi] = unpack("vscore/vpagenum/Vptr", $xbindata); } } else { $phrase_data_count[$j] = 0; $phrase_terms_data[$j] = 0; } } if ($WordNotFound == 1) continue; } else if (strpos($SearchWords[$sw], "*") !== false || strpos($SearchWords[$sw], "?") !== false) { $pattern = "/"; // match entire word if ($SearchAsSubstring == 0) $pattern = $pattern . "\A"; $SearchWords[$sw] = pattern2regexp($SearchWords[$sw]); $pattern = $pattern . $SearchWords[$sw]; if ($SearchAsSubstring == 0) $pattern = $pattern . "\Z"; if ($ToLowerSearchWords != 0) $pattern = $pattern . "/i"; else $pattern = $pattern . "/"; $UseWildCards = 1; } for ($i = 0; $i < $dict_count; $i++) { $dictline = $dict[$i]; $word = $dict[$i][0]; // if we're not using wildcards, direct match if ($ExactPhrase == 1) { // todo: move to next phrase term if first phrase term is skipped? // compare first term in exact phrase $result = strcasecmp($phrase_terms[0], $word); } else if ($UseWildCards == 0) { if ($SearchAsSubstring == 0) $result = strcasecmp($SearchWords[$sw], $word); else { if (stristr($word, $SearchWords[$sw]) == FALSE) $result = 1; // not matched else $result = 0; // matched } } else { // if we have wildcards... $result = !(preg_match($pattern, $word)); } // result = 0 if matched, result != 0 if not matched. // word found but indicated to be not indexed or skipped if ($result == 0 && $dictline[1] == -1) { if ($UseWildCards == 0 && $SearchAsSubstring == 0) { if ($ExactPhrase == 1) $SkippedExactPhrase = 1; SkipSearchWord($sw); break; } else continue; } if ($result == 0) { // keyword found in the dictionary if ($ExactPhrase == 1) { // we'll use the wordmap data for the first term that we have worked out earlier $data = $phrase_terms_data[0]; $data_count = $phrase_data_count[0]; $ContextSeeks = 0; } else { // seek to position in wordmap file fseek($fp_wordmap, $dictline[1]); //print "seeking in wordmap: " . $dictline[1] . "<br />"; // first 2 bytes is data count $countbytes = fread($fp_wordmap, 2); $data_count = ord($countbytes[0]) | ord($countbytes[1])<<8; //print "data count: " . $data_count . "<br />"; for ($bi = 0; $bi < $data_count; $bi++) { $bindata = fread($fp_wordmap, 8); if (strlen($bindata) == 0) print "Error in wordmap file: expected data not found"; $data[$bi] = unpack("vscore/vpagenum/Vptr", $bindata); } } // Go through wordmap for each page this word appears on for ($j = 0; $j < $data_count; $j++) { $score = $data[$j]["score"]; $txtptr = $data[$j]["ptr"]; if ($ExactPhrase == 1) { $maxptr = $data[$j]["ptr"]; $maxptr_term = 0; $GotoNextPage = 0; // Check if all of the other words in the phrase appears on this page. for ($xi = 1; $xi < $num_phrase_terms; $xi++) { // see if this word appears at all on this page, if not, we stop scanning page. // do not check for skipped words (data count value of zero) if ($phrase_data_count[$xi] != 0) { // check wordmap for this search phrase to see if it appears on the current page. for ($xbi = 0; $xbi < $phrase_data_count[$xi]; $xbi++) { if ($phrase_terms_data[$xi][$xbi]["pagenum"] == $data[$j]["pagenum"]) { // intersection, this term appears on both pages, goto next term // remember biggest pointer. if ($phrase_terms_data[$xi][$xbi]["ptr"] > $maxptr) { $maxptr = $phrase_terms_data[$xi][$xbi]["ptr"]; $maxptr_term = $xi; } $score += $phrase_terms_data[$xi][$xbi]["score"]; break; } } if ($xbi == $phrase_data_count[$xi]) // if not found { $GotoNextPage = 1; break; // goto next page } } } // end phrase term for loop if ($GotoNextPage == 1) continue; // Check how many context seeks we have made. $ContextSeeks++; if ($ContextSeeks > $MaxContextSeeks) { print "<small>" . $STR_PHRASE_CONTAINS_COMMON_WORDS . " <b>\"" . $SearchWords[$sw] . "\"</b></small><br /><br />"; break; } // ok, so this page contains all of the words in the phrase $FoundPhrase = 0; $FoundFirstWord = 0; // we goto the first occurance of the first word in pagetext $pos = $maxptr - (($maxptr_term+3) * $MaxDictIDLen); // assume 3 possible punctuations. // do not seek further back than the occurance of the first word (avoid wrong page) if ($pos < $data[$j]["ptr"]) $pos = $data[$j]["ptr"]; fseek($fp_pagetext, $pos); // now we look for the phrase within the context of this page do { for ($xi = 0; $xi < $num_phrase_terms; $xi++) { // do...while loop to ignore punctuation marks in context phrase do { $xword_id = 0; $bytesread = 0; do { $bytes_buffer = fread($fp_pagetext, 2); // grab 2 bytes $dict_id = ord($bytes_buffer[0]) | ord($bytes_buffer[1])<<8; $xword_id += $dict_id; $bytesread += 2; } while ($dict_id >= 65535); $pos += $bytesread; // check if we are at the end of page (wordid = 0) or invalid $xword_id if ($xword_id == 0 || $xword_id >= $dict_count) break; } while ($xword_id <= $DictReservedLimit && !feof($fp_pagetext)); // if the words are NOT the same, we break out if (strcasecmp($dict[$xword_id][0], $phrase_terms[$xi]) != 0) break; // remember how many times we find the first word on this page if ($xi == 0) { $FoundFirstWord++; // remember the position of the 'start' of this phrase //$txtptr = $pos - $MaxDictIDLen; $txtptr = $pos - $bytesread; } } if ($xi == $num_phrase_terms) { // exact phrase found! $FoundPhrase = 1; } } while ($xword_id != 0 && $FoundPhrase == 0 && $FoundFirstWord <= $data[$j]["score"]); if ($FoundPhrase != 1) continue; // goto next page. } //Check if page is already in output list $pageexists = 0; $ipage = $data[$j]["pagenum"]; if ($res_table[$ipage][0] == 0) { // not in list, count this page as a unique match $matches++; $res_table[$ipage][0] += $score; $res_table[$ipage][2] = $txtptr; } else { // already in list if ($res_table[$ipage][0] > 10000) { // take it easy if its too big (to prevent huge scores) $res_table[$ipage][0] += 1; } else { $res_table[$ipage][0] += $score; //Add in score $res_table[$ipage][0] *= 2; //Double Score as we have two words matching } // store the next two searchword matches if ($res_table[$ipage][1] > 0 && $res_table[$ipage][1] < $MaxContextKeywords) { if ($res_table[$ipage][3] == 0) $res_table[$ipage][3] = $txtptr; elseif ($res_table[$ipage][4] == 0) $res_table[$ipage][4] = $txtptr; } } $res_table[$ipage][1] += 1; if ($res_table[$ipage][0] > $maxscore) $maxscore = $res_table[$ipage][0]; // store the 'and' user search terms matched' value if ($res_table[$ipage][5] == $sw || $res_table[$ipage][5] == $sw-$SkippedWords) $res_table[$ipage][5] += 1; } if ($UseWildCards == 0 && $SearchAsSubstring == 0) break; //This search word was found, so skip to next } } } //Close the files fclose($fp_wordmap); if ($SkippedWords > 0) { print "<i>" . $STR_SKIPPED_FOLLOWING_WORDS . " " . $SkippedOutputStr . "<br />\n"; if ($SkippedExactPhrase == 1) print $STR_SKIPPED_PHRASE . ".<br />\n"; print "</i><br />\n"; } //Count number of output lines that match ALL search terms $oline = 0; $fullmatches = 0; // Second pass, results filtering. $ResFiltered = false; $full_numwords = $numwords - $SkippedWords; for ($i = 0; $i < $pagesCount; $i++) { $IsFiltered = false; if ($res_table[$i][0] != 0) { if ($UseCats && $cat != -1) { // Using cats and not doing an "all cats" search if (rtrim($catpages[$i]) != $cat) $IsFiltered = true; } if ($IsFiltered == false) { //if ($res_table[$i][1] >= $full_numwords) if ($res_table[$i][5] >= $full_numwords) $fullmatches++; else { // if AND search, only copy AND results if ($and == 1) $IsFiltered = true; } } if ($IsFiltered == false) { // copy if not filtered out $output[$oline][0] = $i; // page index $output[$oline][1] = $res_table[$i][0]; // score $output[$oline][2] = $res_table[$i][1]; // num of sw matched $output[$oline][3] = $res_table[$i][2]; // pagetext ptr #1 $output[$oline][4] = $res_table[$i][3]; // pagetext ptr #2 $output[$oline][5] = $res_table[$i][4]; // pagetext ptr #3 $oline++; } else { $ResFiltered = true; } } } if ($ResFiltered) { $matches = $oline; } //Sort results in order of score, use the "SortCompare" function if ($matches > 1) { if ($sort == 1 && $UseDateTime == 1 && isset($datetime)) { usort($output, "SortByDate"); } else { // Default sort by relevance usort($output, "SortCompare"); } } // query_out is the query prepared to be passed in a URL. //$query_out = htmlspecialchars($query_out); $query_out = urlencode($query); //Display search result information print("<div class=\"summary\">\n"); if ($matches == 0) print $STR_SUMMARY_NO_RESULTS_FOUND; elseif ($numwords > 1 && $and == 0) { //OR $SomeTermMatches = $matches - $fullmatches; print PrintNumResults($fullmatches) . " " . $STR_SUMMARY_FOUND_CONTAINING_ALL_TERMS . " "; if ($SomeTermMatches > 0) print PrintNumResults($SomeTermMatches) . " " . $STR_SUMMARY_FOUND_CONTAINING_SOME_TERMS; } elseif ($numwords > 1 && $and == 1) //AND print PrintNumResults($fullmatches) . " " . $STR_SUMMARY_FOUND_CONTAINING_ALL_TERMS; else print PrintNumResults($matches) . " " . $STR_SUMMARY_FOUND; print "<br />"; if ($matches < 3) { if ($and == 1 && $numwords > 1) print "<br />" . $STR_POSSIBLY_GET_MORE_RESULTS . " <a href=\"".$SelfURL."?zoom_query=".$query_out."&zoom_page=".$page."&zoom_per_page=".$per_page."&zoom_cat=".$cat."&zoom_and=0&zoom_sort=".$sort."\">". $STR_ANY_OF_TERMS . "</a>.<br />"; else if ($UseCats && $cat != -1) print "<br />" . $STR_POSSIBLY_GET_MORE_RESULTS . " <a href=\"".$SelfURL."?zoom_query=".$query_out."&zoom_page=".$page."&zoom_per_page=".$per_page."&zoom_cat=-1&zoom_and=".$and."&zoom_sort=".$sort."\">" . $STR_ALL_CATS . "</a>.<br />"; } print "</div>\n"; if ($Spelling == 1 && $matches < $SpellingWhenLessThan) { // load in spellings file $fp_spell = fopen("zoom_spelling.zdat", "rt"); $i = 0; while (!feof($fp_spell)) { $spline = fgets($fp_spell, $MaxKeyWordLineLen); if (strlen($spline) > 0) { $spell[$i] = explode(" ", $spline, 2); $i++; } } fclose($fp_spell); $spell_count = $i; $SuggestStr = ""; $SuggestionFound = 0; $SuggestionCount = 0; for ($sw = 0; $sw < $numwords; $sw++) { // word does not match, check if it is a candidate to be a suggestion $sw_spcode = GetSPCode($SearchWords[$sw]); if (strlen($sw_spcode) == 4) { $SuggestionFound = 0; for ($i = 0; $i < $spell_count; $i++) { $spcode = $spell[$i][0]; $dictid = intval($spell[$i][1]); $word = $dict[$dictid][0]; $ptr = $dict[$dictid][1]; if ($spcode == $sw_spcode) { if ($ptr == -1 || strcasecmp($word, $SearchWords[$sw]) == 0) { // Check that it is not a skipped word or the same word $SuggestionFound = 0; break; } else { $SuggestionFound = 1; $SuggestionCount++; break; } } elseif (strcmp($spcode, $sw_spcode) > 0) { break; } if ($SuggestionFound == 1) break; } if ($sw > 0) $SuggestStr = $SuggestStr . " "; if ($SuggestionFound == 1) $SuggestStr = $SuggestStr . $word; // add string AFTER so we can preserve order of words else $SuggestStr = $SuggestStr . $SearchWords[$sw]; } } if ($SuggestionCount > 0) { print "<br />" . $STR_DIDYOUMEAN . " <a href=\"".$SelfURL."?zoom_query=".urlencode($SuggestStr)."&zoom_page=".$page."&zoom_per_page=".$per_page."&zoom_cat=".$cat."&zoom_and=0&zoom_sort=".$sort."\">". $SuggestStr . "</a>?<br />"; } } // Number of pages of results $num_pages = ceil($matches / $per_page); if ($num_pages > 1) print "<br />" . $num_pages . " " . $STR_PAGES_OF_RESULTS . "<br />\n"; // Show sorting options if ($matches > 1) { if ($UseDateTime == 1) { print("<div class=\"sorting\">"); if ($sort == 1) print("<a href=\"".$SelfURL."?zoom_query=".$query_out."&zoom_page=".$page."&zoom_per_page=".$per_page."&zoom_cat=".$cat."&zoom_and=".$and."&zoom_sort=0\">". $STR_SORTBY_RELEVANCE . "</a> / <b>". $STR_SORTEDBY_DATE . "</b>"); else print("<b>". $STR_SORTEDBY_RELEVANCE . "</b> / <a href=\"".$SelfURL."?zoom_query=".$query_out."&zoom_page=".$page."&zoom_per_page=".$per_page."&zoom_cat=".$cat."&zoom_and=".$and."&zoom_sort=1\">". $STR_SORTBY_DATE . "</a>"); print("</div>"); } } // Determine current line of result from the $output array if ($page == 1) { $arrayline = 0; } else { $arrayline = (($page - 1) * $per_page); } // The last result to show on this page $result_limit = $arrayline + $per_page; // Display the results while ($arrayline < $matches && $arrayline < $result_limit) { $ipage = $output[$arrayline][0]; $score = $output[$arrayline][1]; print "<p></p>\n"; print "<div class=\"result_title\">"; if ($DisplayNumber == 1) print "<b>".($arrayline+1).".</b> "; if ($DisplayTitle == 1) { if ($GotoHighlight == 1) { if ($SearchAsSubstring == 1) print "<a href=\"".rtrim($urls[$ipage])."?zoom_highlightsub=". $query_out . "\"" . $target . ">"; else print "<a href=\"".rtrim($urls[$ipage])."?zoom_highlight=". $query_out . "\"" . $target . ">"; } else print "<a href=\"".rtrim($urls[$ipage])."\"" . $target . ">"; if ($Highlighting == 1) PrintHighlightDescription(rtrim($titles[$ipage])); else print rtrim($titles[$ipage]); print "</a>"; } else print "<a href=\"".rtrim($urls[$ipage])."\"" . $target . ">".rtrim($urls[$ipage])."</a>"; if ($UseCats) { $catindex = rtrim($catpages[$ipage]); print " <span class=\"category\">[". rtrim($catnames[$catindex]) . "]</span>"; } print "</div>\n"; if ($DisplayMetaDesc == 1) { // Print meta description if (strlen($descriptions[$ipage]) > 2) { print("<div class=\"description\">"); if ($Highlighting == 1) PrintHighlightDescription(rtrim($descriptions[$ipage])); else print rtrim($descriptions[$ipage]); print "<b>...</b></div>\n"; } } if ($DisplayContext == 1) { // Extract contextual page content $context_keywords = $output[$arrayline][2]; // # of terms matched if ($context_keywords > $MaxContextKeywords) $context_keywords = $MaxContextKeywords; $context_word_count = ceil($ContextSize / $context_keywords); $goback = floor($context_word_count / 2); $gobackbytes = $goback * $MaxDictIDLen; if (($gobackbytes / 2) > ($context_word_count - $context_maxgoback - $goback)) // 2 is MinDictIDLen { // go back less if potential for matched word to be outside the context range // determine most bytes we should go back to fit the word in in case of all dict ID's were min. len. $gobackbytes = 2 * ($context_word_count - $context_maxgoback - $goback); // determine number of words available with this number of bytes, if all dict ID's were max. len. // thus avoiding jumping into the middle of a multi-pair dictID value $goback = floor($gobackbytes / $MaxDictIDLen); // redetermine max bytes to jump back for this number of words $gobackbytes = $goback * $MaxDictIDLen; } $last_startpos = 0; $last_endpos = 0; $FoundContext = 0; print "<div class=\"context\">\n"; for ($j = 0; $j < $context_keywords && !feof($fp_pagetext); $j++) { $origpos = $output[$arrayline][3 + $j]; $startpos = $origpos; if ($gobackbytes < $startpos) { $startpos = $startpos - $gobackbytes; $noGoBack = false; } else $noGoBack = true; //if ($startpos < 0) // $startpos = 0; // Check that this will not overlap with previous extract if ($startpos > $last_startpos && $startpos < $last_endpos) $startpos = $last_endpos; // we will just continue last extract if so. // find the pagetext pointed to fseek($fp_pagetext, $startpos); // remember the last start position $last_startpos = $startpos; $word_id = GetNextDictWord($fp_pagetext); //print "wordid: " . $word_id; $context_str = ""; $noSpaceForNextChar = false; for ($i = 0; $i < $context_word_count && !feof($fp_pagetext); $i++) { if ($noSpaceForNextChar == false) { //if ($word_id > $DictReservedLimit) // no space for reserved words (punctuation, etc) // No space for reserved words (punctuation, etc) if ($word_id > $DictReservedNoSpaces) $context_str .= " "; elseif ($word_id > $DictReservedSuffixes && $word_id <= $DictReservedPrefixes) { // This is a Prefix character $context_str .= " "; $noSpaceForNextChar = true; } elseif ($word_id > $DictReservedPrefixes) // this is a nospace character $noSpaceForNextChar = true; } else $noSpaceForNextChar = false; if ($word_id == 0 || $word_id >= $dict_count) // check if end of page { // if end of page occurs AFTER word pointer (ie: reached next page) if ($noGoBack || ftell($fp_pagetext) > $origpos) break; // then we stop. else // if end of page occurs BEFORE word pointer (ie: reached previous page) { $context_str = "";// then we clear the existing context buffer we've created. $i = 0; } } else $context_str .= $dict[$word_id][0]; $word_id = GetNextDictWord($fp_pagetext); } // remember the last end position $last_endpos = ftell($fp_pagetext); if (strcmp(trim($context_str), trim($titles[$ipage])) == 0) { $context_str = ""; // clear the string if its identical to the title } if ($context_str != "") { print " <b>...</b> "; $FoundContext = 1; if ($Highlighting == 1) PrintHighlightDescription($context_str); else print $context_str; } } if ($FoundContext == 1) print " <b>...</b>"; print "</div>\n"; } $info_str = ""; if ($DisplayTerms == 1) { $info_str .= $STR_RESULT_TERMS_MATCHED . " ". $output[$arrayline][2]; } if ($DisplayScore == 1) { if (strlen($info_str) > 0) $info_str .= " - "; $info_str .= $STR_RESULT_SCORE . " " . $score; } if ($DisplayDate == 1) { if (strlen($info_str) > 0) $info_str .= " - "; //$info_str .= date("j M Y @ g:i A", $datetime[$ipage]) ." - "; $info_str .= date("j M Y", $datetime[$ipage]); } if ($DisplayURL == 1) { if (strlen($info_str) > 0) $info_str .= " - "; $info_str .= $STR_RESULT_URL . " ".rtrim($urls[$ipage]); } print "<div class=\"infoline\">"; print $info_str; print "</div>\n"; $arrayline++; } if ($DisplayContext == 1 || $AllowExactPhrase == 1) fclose($fp_pagetext); // Show links to other result pages if ($num_pages > 1) { // 10 results to the left of the current page $start_range = $page - 10; if ($start_range < 1) $start_range = 1; // 10 to the right $end_range = $page + 10; if ($end_range > $num_pages) $end_range = $num_pages; print "<p></p>\n" . $STR_RESULT_PAGES . " "; if ($page > 1) print "<a href=\"".$SelfURL."?zoom_query=".$query_out."&zoom_page=".($page-1)."&zoom_per_page=".$per_page."&zoom_cat=".$cat."&zoom_and=".$and."&zoom_sort=".$sort."\"><< " . $STR_RESULT_PAGES_PREVIOUS . "</a> "; for ($i = $start_range; $i <= $end_range; $i++) { if ($i == $page) { print $page." "; } else { print "<a href=\"".$SelfURL."?zoom_query=".$query_out."&zoom_page=".($i)."&zoom_per_page=".$per_page."&zoom_cat=".$cat."&zoom_and=".$and."&zoom_sort=".$sort."\">".$i."</a> "; } } if ($page != $num_pages) print "<a href=\"".$SelfURL."?zoom_query=".$query_out."&zoom_page=".($page+1)."&zoom_per_page=".$per_page."&zoom_cat=".$cat."&zoom_and=".$and."&zoom_sort=".$sort."\">" . $STR_RESULT_PAGES_NEXT . " >></a> "; } print "</div>"; // end of results style tag if ($Timing == 1 || $Logging == 1) { $mtime = explode(" ", microtime()); $endtime = doubleval($mtime[1]) + doubleval($mtime[0]); $difference = abs($starttime - $endtime); $timetaken = number_format($difference, 3, '.', ''); if ($Timing == 1) print "<br /><br /><small>" . $STR_SEARCH_TOOK . " " . $timetaken . " " . $STR_SECONDS . "</small><br />\n"; } //Log the search words, if required if ($Logging == 1) { $LogQuery = str_replace("\"", "\"\"", $query); $LogString = Date("Y-m-d, H:i:s") . ", " . $_SERVER['REMOTE_ADDR'] . ", \"" .$LogQuery . "\", Matches = " . $matches; if ($and == 1) $LogString = $LogString . ", AND"; else $LogString = $LogString . ", OR"; if ($NewSearch == 1) $page = 0; $LogString = $LogString . ", PerPage = " . $per_page . ", PageNum = " . $page; if ($UseCats == 0) $LogString = $LogString . ", No cats"; else { if ($cat == -1) $LogString = $LogString . ", \"Cat = All\""; else { $logCatStr = trim($catnames[$cat]); $logCatStr = str_replace("\"", "\"\"", $logCatStr); $LogString = $LogString . ", \"Cat = " . $logCatStr . "\""; } } $LogString = $LogString . ", Time = " . $timetaken; // end of entry $LogString = $LogString . "\r\n"; $fp = fopen ($LogFileName, "ab"); if ($fp != false) { fputs ($fp, $LogString); fclose ($fp); } else { print "Unable to write to log file (" . $LogFileName . "). Check that you have specified the correct log filename in your Indexer settings and that you have the required file permissions set.<br />"; } } //Print out the end of the template PrintEndOfTemplate($template); ?>